import scrapy
from scrapy.loader import ItemLoader
from recruit.items import RecruitItem

source = 1
class ZhilianSpider(scrapy.Spider):
    name = 'zhilian'
    allowed_domains = ['www.zhaopin.com']
    start_urls = ['https://sou.zhaopin.com/?jl=575&kw=Java%E5%BC%80%E5%8F%91&p=1']

    def parse(self, response):
        print('aqie', response.request.headers['User-Agent'], '\n')

        job_unit = response.xpath('//*[@class="positionlist"]/div[contains(@class,"joblist-box__item")]')
        for r in job_unit:
            item_loader = ItemLoader(item=RecruitItem(), response=response)
            item_loader.add_value('job_name', r.xpath('./a/div[1]/div/span/text()').extract_first())  # 岗位名
            item_loader.add_value('company', r.xpath('./a/div[1]/div[2]/span/text()').extract_first())  # 企业名
            item_loader.add_value('type', r.xpath('./a/div[2]/div/span/text()').extract_first())  # 性质
            item_loader.add_value('salary', r.xpath('./a/div[2]/div/p/text()').extract_first()) # 薪水
            ree = r.xpath('./a/div[2]/div/ul').xpath('string(.)').extract_first().split(" ")
            item_loader.add_value('region', ree[0])  # region
            item_loader.add_value('experience', ree[1])  # region
            item_loader.add_value('education', ree[2])  # region

            item_loader.add_value("source", source)
            yield item_loader.load_item()
        pass
